Avec la variable continue du CO2 élevé, on se dit que les expressions différentielles deux à deux auront moins d’intérêt que des analyses par splines. L’objective est donc de déterminer les profils des gènes différentiellements exprimés sur la base d’une interpolation polynomiale liée à l’élévation du CO2.
Données après correction des mix-up samples :
corrplot(cor(data), method = 'color')
tcc <- normalize(data, norm_method = 'tmm', iteration = FALSE)
conditions <- str_split_fixed(colnames(data), '_', 2)[,1]
tcc <- filter_low_counts(tcc, 10*length(conditions))
normalized_counts <- TCC::getNormalizedData(tcc)
draw_distributions(data) /
draw_distributions(normalized_counts)
draw_PCA(normalized_counts)
DF <- 2
FDR <- 0.01
CO2 <- as.numeric(str_split_fixed(conditions, '\\.', 2)[,1])
CO2_spline <- ns(CO2, df = DF)
N <- str_split_fixed(conditions, '\\.', 2)[,2]
design <- model.matrix(~CO2_spline*N)
dge <- edgeR::DGEList(
counts = normalized_counts,
norm.factors = rep(1,ncol(normalized_counts)))
y <- edgeR::estimateDisp(dge, design)
fit <- edgeR::glmQLFit(y, design)
CO2_coeffs <- 2:(1+DF)
N_coeff <- DF+2
CO2_N_coeffs <- (DF+3):ncol(design)
annot <- gene_annotations$`Arabidopsis thaliana`
CO2_dea <- edgeR::glmQLFTest(fit, coef = CO2_coeffs)$table %>%
filter(PValue < FDR)
CO2_dea[,c("label", "description")] <-
annot[rownames(CO2_dea), c("label", "description") ]
CO2_dea[c("label", "description", "PValue", "logCPM", "logFC.CO2_spline1", "logFC.CO2_spline2")] %>%
rename(CO2_spline1 = logFC.CO2_spline1,
CO2_spline2 = logFC.CO2_spline2) %>%
DT::datatable(
options = list(scrollX = TRUE)) %>%
DT::formatSignif(columns = c("PValue", "logCPM", "CO2_spline1", "CO2_spline2"),
digits = 4)%>%
DT::formatStyle(
columns = c("CO2_spline1"),
target = c("cell", "row"),
color = DT::styleInterval(cuts = c( 0 ),
values = c("#FF000035", "#72F02466")))%>%
DT::formatStyle(
columns = c("CO2_spline2"),
target = c("cell", "row"),
color = DT::styleInterval(cuts = c( 0 ),
values = c("#FF000035", "#72F02466")))
DEGs_CO2 <- rownames(CO2_dea)
N_dea <- edgeR::glmQLFTest(fit, coef = N_coeff)$table %>%
filter(PValue < FDR)
N_dea[,c("label", "description")] <-
annot[rownames(N_dea), c("label", "description") ]
DT::datatable(N_dea[c("label", "description", "PValue", "logCPM", "logFC")],
options = list(scrollX = TRUE)) %>%
DT::formatSignif(columns = c("PValue", "logCPM", "logFC"),
digits = 4)%>%
DT::formatStyle(
columns = c("logFC"),
target = c("cell", "row"),
color = DT::styleInterval(cuts = c( 0 ),
values = c("#FF000035", "#72F02466")))
DEGs_N <- rownames(N_dea)
CO2_N_dea <- edgeR::glmQLFTest(fit, coef = CO2_N_coeffs)$table %>%
filter(PValue < FDR)
CO2_N_dea[,c("label", "description")] <-
annot[rownames(CO2_N_dea), c("label", "description") ]
CO2_N_dea[c("label", "description", "PValue", "logCPM", "logFC.CO2_spline1.NMix", "logFC.CO2_spline2.NMix")] %>%
rename(CO2_spline1.NMix=logFC.CO2_spline1.NMix , CO2_spline2.NMix=logFC.CO2_spline2.NMix) %>%
DT::datatable(
options = list(scrollX = TRUE)) %>%
DT::formatSignif(columns = c("PValue", "logCPM", "CO2_spline1.NMix", "CO2_spline2.NMix"),
digits = 4)%>%
DT::formatStyle(
columns = c("CO2_spline1.NMix"),
target = c("cell", "row"),
color = DT::styleInterval(cuts = c( 0 ),
values = c("#FF000035", "#72F02466")))%>%
DT::formatStyle(
columns = c("CO2_spline2.NMix"),
target = c("cell", "row"),
color = DT::styleInterval(cuts = c( 0 ),
values = c("#FF000035", "#72F02466")))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
DEGs_CO2_N <- rownames(CO2_N_dea)
draw_venn(list("DEGs_CO2"=DEGs_CO2, "DEGs_N"=DEGs_N, "DEGs_CO2_N"=DEGs_CO2_N))
ngenes <- read.table('../data/Ngenes.csv', h=T, sep = ';')
genes <- sample(DEGs_CO2, 5)
draw_genes <- function(genes, normalized_counts, ncol=NULL, labels = FASLE){
genes <- intersect(genes, rownames(normalized_counts))
data <- reshape2::melt(normalized_counts[genes,] ,
quiet =T, value.name = "Expression") %>%
rename(gene = Var1, condition = Var2) %>%
separate(condition, into = c("CO2", "Nutrition", "rep")) %>%
mutate(CO2 = as.numeric(CO2))
if(labels) {
data$label <- ngenes[match(data$gene, ngenes$AGI), "Gene"]
data %>%
ggplot(aes(x=CO2, y=Expression, color=Nutrition, fill = Nutrition)) +
geom_point() + geom_smooth(method = "lm", se = TRUE,
size = 1, alpha=0.1,
formula = y ~ splines::ns(x, df = DF)) +
facet_wrap(~label, scales = 'free', ncol = ncol) +
theme_pubr() + scale_fill_brewer(palette="Accent")+
scale_color_brewer(palette="Accent")+
labs(title = "Normalized gene expression in CO2 gradient experiment",
subtitle = paste("Splines interpolation with DF=", DF))
}
else{
data$label <- ngenes[match(data$gene, ngenes$AGI), "Gene"]
data %>%
ggplot(aes(x=CO2, y=Expression, color=Nutrition, fill = Nutrition)) +
geom_point() + geom_smooth(method = "lm", se = TRUE,
size = 1, alpha=0.1,
formula = y ~ splines::ns(x, df = DF)) +
facet_wrap(~gene, scales = 'free', ncol = ncol) +
theme_pubr() + scale_fill_brewer(palette="Accent")+
scale_color_brewer(palette="Accent")+
labs(title = "Normalized gene expression in CO2 gradient experiment",
subtitle = paste("Splines interpolation with DF=", DF))
}
}
draw_genes(ngenes[ngenes$Role == "nitrate uptake transport and metabolism","AGI"], normalized_counts, ncol = 4, labels = T)
draw_genes(ngenes[ngenes$Role == "negative regulator","AGI"], normalized_counts, ncol = 4, labels = T)
draw_genes(ngenes[ngenes$Role == "positive regulator","AGI"], normalized_counts, ncol = 3, labels = T)/
draw_genes(ngenes[ngenes$Role == "signalling","AGI"], normalized_counts, ncol = 3, labels = T)
bg <- convert_from_agi(rownames(normalized_counts))
##
entrez <- convert_from_agi(DEGs_CO2)
go_CO2 <- enrich_go(entrez, bg)
##
## Registered S3 method overwritten by 'ggforce':
## method from
## scale_type.units units
# DT::datatable(go_CO2,
# options = list(scrollX = TRUE))
draw_enrich_go(go_CO2)
entrez <- convert_from_agi(DEGs_N)
go_N <- enrich_go(entrez, bg)
# DT::datatable(go_N,
# options = list(scrollX = TRUE))
draw_enrich_go(go_N)
entrez <- convert_from_agi(DEGs_CO2_N)
go_CO2_N <- enrich_go(entrez, bg)
# DT::datatable(go_CO2_N,
# options = list(scrollX = TRUE))
draw_enrich_go(go_CO2)
draw_clusters <- function(normalized_counts, genes, clustering, ncol=NULL){
reshape2::melt(normalized_counts[genes,],
quiet =T, value.name = "Expression") %>%
rename(gene = Var1, condition = Var2) %>%
separate(condition, into = c("CO2", "Nutrition", "rep")) %>%
mutate(CO2 = as.numeric(CO2),
cluster = clustering$membership[gene]) %>%
group_by(gene) %>%
mutate(Expression= (Expression-mean(Expression))/sd(Expression)) %>%
group_by(cluster, CO2, Nutrition, rep) %>%
mutate(N_genes = n()) %>%
unite(Cluster_label, cluster, N_genes, sep = ', N=') %>%
ggplot(aes(x=CO2, y=Expression, color=Nutrition, fill = Nutrition)) +
geom_point(alpha=0.01)+geom_smooth(method = "lm", se = TRUE,
size = 1, alpha=0.1,
formula = y ~ splines::ns(x, df = DF)) +
facet_wrap(~Cluster_label, scales = 'free', ncol = ncol) +
theme_pubr() +scale_fill_brewer(palette="Accent")+
scale_color_brewer(palette="Accent")+
labs(title = "Normalized clusters expression in CO2 gradient experiment",
subtitle = paste("Splines interpolation with DF=", DF))
}
draw_clusters(normalized_counts, DEGs_CO2,
run_coseq(conds = conditions, data = normalized_counts,
genes = DEGs_CO2, K = 6, transfo = "arcsin",
model = "Normal", seed = 123)) + ggtitle("C02 DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 6 to 6
## Use seed argument in coseq for reproducible results.
## ****************************************
draw_clusters(normalized_counts, DEGs_N,
run_coseq(conds = conditions, data = normalized_counts,
genes = DEGs_N, K = 4, transfo = "arcsin",
model = "Normal", seed = 123)) + ggtitle("N DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 4 to 4
## Use seed argument in coseq for reproducible results.
## ****************************************
draw_clusters(normalized_counts, DEGs_CO2_N,
run_coseq(conds = conditions, data = normalized_counts,
genes = DEGs_CO2_N, K = 9, transfo = "arcsin",
model = "Normal", seed = 123)) + ggtitle("N*CO2 DEGs : clusters profiles")
## ****************************************
## coseq analysis: Normal approach & arcsin transformation
## K = 9 to 9
## Use seed argument in coseq for reproducible results.
## ****************************************